In [ ]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import statistics
import seaborn as sns

def print_full_dataframe(df):
    with pd.option_context('display.max_columns', None, 'display.expand_frame_repr', False):
        print(df)

df = pd.read_csv('master.csv')
df2 = pd.read_csv('share-with-anxiety-disorders.csv')
df3 = pd.read_csv('Countries_GDP_1960-2020.csv')
df4 = pd.read_csv('DP_LIVE_13042023005821788.csv', encoding='latin1')


print("df.isnull().sum()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df.isnull().sum())
print()
print()
print()
print()

print("df2.isnull().sum()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df2.isnull().sum())
print()
print()
print()
print()

print("df3.isnull().sum()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
null_values_df = df3.isnull().sum().to_frame().T
print_full_dataframe(null_values_df)
print()
print()
print()
print()

print("df4.isnull().sum()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df4.isnull().sum())
print()
print()
print()
print()

print("df")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df)
print()
print()
print()
print()

print("df2")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df2)
print()
print()
print()
print()

print("df3")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df3)
print()
print()
print()
print()

print("df4")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df4)
print()
print()
print()
print()

#Start filtering the Dataframes
df['HDI for year'].fillna(df['HDI for year'].mean(), inplace=True)# convert the ' gdp_for_year ($) ' column to a numeric type
df[' gdp_for_year ($) '] = df[' gdp_for_year ($) '].str.replace(',', '').astype(float)
filtered_df = df[(df['year'] >= 2000) & (df['year'] <= 2015)]
df2_filtered = df2[(df2['Year'] >= 2000) & (df2['Year'] <= 2015)]
df2_filtered = df2_filtered.drop('Code', axis=1)
years_to_keep = [str(year) for year in range(2000, 2016)]  # List of strings containing years from 2000 to 2015
columns_to_keep = ['Country Name', 'Country Code'] + years_to_keep
df3_filtered = df3[columns_to_keep]
df4_filtered = df4[(df4['TIME'] >= 2000) & (df4['TIME'] <= 2015)]
df4_filtered = df4_filtered.rename(columns={"\"LOCATION\"": "LOCATION"})
df4_filtered = df4_filtered.drop('Flag Codes', axis=1)
df4_filtered = df4_filtered.drop('SUBJECT',axis=1)
df4_filtered = df4_filtered.drop('MEASURE',axis=1)
df4_filtered = df4_filtered.drop('FREQUENCY',axis=1)
df4_filtered = df4_filtered.drop('INDICATOR',axis=1)
print("filtered_df.isnull().sum()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(filtered_df.isnull().sum())
filtered_df.to_csv('filtered_df.csv', index=False)
print()
print()
print()
print()

print("df2_filtered.isnull().sum()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df2_filtered.isnull().sum())
df2_filtered.to_csv('df2_filtered.csv', index=False)
print()
print()
print()
print()

print("df3_filtered.isnull().sum()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
null_values_df = df3_filtered.isnull().sum().to_frame().T
print_full_dataframe(null_values_df)
df3_filtered.to_csv('df3_filtered.csv', index=False)
print()
print()
print()
print()

print("df4_filtered.isnull().sum()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df4_filtered.isnull().sum())
df4_filtered.to_csv('df4_filtered.csv', index=False)
print()
print()
print()
print()

print("filtered_df")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(filtered_df)
print()
print()
print()
print()

print("df2_filtered")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df2_filtered)
print()
print()
print()
print()

print("df3_filtered")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df3_filtered)
print()
print()
print()
print()

print("df4_filtered")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(df4_filtered)
print()
print()
print()
print()

average_suicide_rates_by_country = filtered_df.groupby('country')['suicides/100k pop'].mean().sort_values(ascending=False)
print("average_suicide_rates_by_country = filtered_df.groupby('country')['suicides/100k pop'].mean().sort_values(ascending=False)")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_suicide_rates_by_country)
print()
print()
print()
print()

average_suicide_rates_by_country_and_year = filtered_df.groupby(['year','country'])['suicides/100k pop'].mean()
#average_suicide_rates_by_country_and_year.to_csv('average_suicide_rates_by_country_and_year.csv', index=True)
print("average_suicide_rates_by_country_and_year = filtered_df.groupby(['year','country'])['suicides/100k pop'].mean().sort_values(ascending=False)")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_suicide_rates_by_country_and_year)
print()
print()
print()
print()

average_suicide_rates_by_gender = filtered_df.groupby('sex')['suicides/100k pop'].mean()
print("average_suicide_rates_by_gender = filtered_df.groupby('sex')['suicides/100k pop'].mean()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_suicide_rates_by_gender)
print()
print()
print()
print()

average_suicide_rates_by_gender_and_year = filtered_df.groupby(['year','sex'])['suicides/100k pop'].mean()
#average_suicide_rates_by_gender_and_year.to_csv('average_suicide_rates_by_gender_and_year.csv', index=True)
print("average_suicide_rates_by_gender_and_year = filtered_df.groupby(['year','sex'])['suicides/100k pop'].mean()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_suicide_rates_by_gender_and_year)
print()
print()
print()
print()

average_suicide_rates_by_age = filtered_df.groupby('age')['suicides/100k pop'].mean().sort_values(ascending=False)
print("average_suicide_rates_by_age = filtered_df.groupby('age')['suicides/100k pop'].mean().sort_values(ascending=False)")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_suicide_rates_by_age)
print()
print()
print()
print()

average_suicide_rates_by_age_and_year = filtered_df.groupby(['year','age'])['suicides/100k pop'].mean()
#average_suicide_rates_by_age_and_year.to_csv('average_suicide_rates_by_age_and_year.csv', index=True)
print("average_suicide_rates_by_age_and_year = filtered_df.groupby(['year','age'])['suicides/100k pop'].mean().sort_values(ascending=False)")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_suicide_rates_by_age_and_year)
print()
print()
print()
print()

#calculate the correlation coefficients
correlations = filtered_df[['suicides/100k pop', 'gdp_per_capita ($)', ' gdp_for_year ($) ']].corr()
print("correlations = filtered_df[['suicides/100k pop', 'gdp_per_capita ($)', ' gdp_for_year ($) ']].corr()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(correlations)
print()
print()
print()
print()

average_suicide_rates_by_generation = filtered_df.groupby('generation')['suicides/100k pop'].mean().sort_values(ascending=False)
print("average_suicide_rates_by_generation = filtered_df.groupby('generation')['suicides/100k pop'].mean().sort_values(ascending=False)")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_suicide_rates_by_generation)
print()
print()
print()
print()

average_suicide_rates_by_generation_and_year = filtered_df.groupby(['year', 'generation'])['suicides/100k pop'].mean()
#average_suicide_rates_by_generation_and_year.to_csv('average_suicide_rates_by_generation_and_year.csv', index=True)
print("average_suicide_rates_by_generation_and_year = filtered_df.groupby(['year', 'generation'])['suicides/100k pop'].mean().sort_values(ascending=False)")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_suicide_rates_by_generation_and_year)
print()
print()
print()
print()

average_suicide_rates_by_year = filtered_df.groupby('year')['suicides/100k pop'].mean()
print("average_suicide_rates_by_year = filtered_df.groupby('year')['suicides/100k pop'].mean()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_suicide_rates_by_year)
print()
print()
print()
print()

prevalence_by_country = df2_filtered.groupby(['Entity', 'Year'])['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].mean()
print("prevalence_by_country = df2_filtered.groupby(['Entity', 'Year'])['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].mean()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(prevalence_by_country)
print()
print()
print()
print()

average_prevalence_by_country = df2_filtered.groupby('Entity')['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].mean()
print("average_prevalence_by_country = df2_filtered.groupby('Entity')['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].mean()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_prevalence_by_country)
print()
print()
print()
print()

mean_prevalence = df2_filtered.groupby('Year')['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].mean()
print("mean_prevalence = df2_filtered.groupby('Year')['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].mean()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(mean_prevalence)
print()
print()
print()
print()

median_prevalence = df2_filtered.groupby('Year')['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].median()
print("median_prevalence = df2_filtered.groupby('Year')['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].median()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(median_prevalence)
print()
print()
print()
print()

total_gdp = df3_filtered.iloc[:, 2:].sum(axis=0)
print("total_gdp = df3_filtered.iloc[:, 2:].sum(axis=0)")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(total_gdp)
print()
print()
print()
print()

average_gdp = df3_filtered.iloc[:, 2:].mean(axis=0)
print("average_gdp = df3_filtered.iloc[:, 2:].mean(axis=0)")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(average_gdp)
print()
print()
print()
print()

mean_unemployment = df4_filtered.groupby('TIME')['Value'].mean()
print("mean_unemployment = df4_filtered.groupby('TIME')['Value'].mean()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(mean_unemployment)
print()
print()
print()
print()

median_unemployment = df4_filtered.groupby('TIME')['Value'].median()
print("median_unemployment = df4_filtered.groupby('TIME')['Value'].median()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(median_unemployment)
print()
print()
print()
print()

# Rename columns in df2_filtered, df3_filtered, and df4_filtered to match the common columns in filtered_df
df2_filtered.rename(columns={'Entity': 'country', 'Year': 'year'}, inplace=True)
df3_filtered.rename(columns={'Country Name': 'country', 'Country Code': 'country_code'}, inplace=True)
df4_filtered.rename(columns={'LOCATION': 'country_code', 'TIME': 'year', 'Value': 'value'}, inplace=True)
filtered_df.to_csv('filtered_df.csv', index=False)
df2_filtered.to_csv('df2_filtered.csv', index=False)
df3_filtered.to_csv('df3_filtered.csv', index=False)
df4_filtered.to_csv('df4_filtered.csv', index=False)
# Merge filtered_df with df2_filtered
merged_df = pd.merge(filtered_df, df2_filtered, on=['country', 'year'], how='left')#first merge
merged_df.to_csv('merged_df.csv', index=False)
print("merged_df = filtered_df.merge(df2_filtered, on=['country', 'year'], how='left')")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(merged_df)
print()
print()
print()
print()

# Merge the resulting DataFrame with df3_filtered
# Note: df3_filtered has data in wide format, with years as columns. We need to convert it to long format before merging
df3_long = pd.melt(df3_filtered, id_vars=['country', 'country_code'], var_name='year', value_name='gdp')


# Convert 'year' column to integer type
df3_long['year'] = df3_long['year'].astype(int)

df3_long.to_csv('df3_long.csv', index=False)
merged_df = pd.merge(merged_df, df3_long, on=['country', 'year'], how='left')#second merge



merged_df.to_csv('merged_df2.csv', index=False)
print("merged_df = merged_df.merge(df3_long, on=['country', 'year'], how='left')")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(merged_df)
print()
print()
print()
print()

# Merge the resulting DataFrame with df4_filtered
merged_df = pd.merge(merged_df, df4_filtered, left_on=['country_code', 'year'], right_on=['country_code', 'year'], how='left')#last merge
merged_df.dropna(inplace=True)
merged_df = merged_df.drop('country-year',axis=1)
merged_df = merged_df.drop('HDI for year',axis=1)
merged_df = merged_df.drop('gdp',axis=1)
merged_df = merged_df.drop('country_code',axis=1)
merged_df.to_csv('merged_df3.csv', index=False)
print("merged_df = merged_df.merge(df4_filtered, on=['country', 'year'], how='left')")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(merged_df)
print()
print()
print()
print()

print("merged_df.isnull().sum()")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(merged_df.isnull().sum())
print()
print()
print()
print()

print("merged_df.describe(include='all')")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(merged_df.describe(include='all'))
print()
print()
print()
print()


grouped_data = merged_df.groupby(['country', 'year']).agg({
    'suicides_no': 'sum',
    'population': 'sum',
    ' gdp_for_year ($) ': 'first',
    'gdp_per_capita ($)': 'first',
    'Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)': 'first',
    'value': 'first'
}).reset_index()
grouped_data = grouped_data.rename(columns={"Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)": "Anxiety Prevalence (%)"})
grouped_data = grouped_data.rename(columns={"value": "Unemployment Rate(%)"})
# Calculate the new 'suicides/100k pop' rate for each group
grouped_data['suicides/100k pop'] = (grouped_data['suicides_no'] / grouped_data['population']) * 100000

# Write the result to a new CSV file
grouped_data.to_csv('transformed_data.csv', index=False)

print("grouped_data = merged_df.groupby(['country', 'year']).agg")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(grouped_data)
print()
print()
print()
print()

print("grouped_data.describe(include='all')")
print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
print(grouped_data.describe(include='all'))
print()
print()
print()
print()


tempDef2 = df4_filtered[(df4_filtered['country_code'] == 'USA')]
npArray = tempDef2.to_numpy()

npArray1 = []

for x in npArray:
  npArray1.append(x[2])

npArray1 = np.array(npArray1)
npArray1

Mean = np.mean(npArray1)
Median = np.median(npArray1)
Std = np.std(npArray1)
q1 = np.percentile(npArray1,25)
q3 = np.percentile(npArray1, 75)
IQR = q3 - q1

data = {}
data['Mean'] = Mean
data['Median'] = Median
data['Std'] = Std
data['IQR'] = IQR

Stats = list(data.keys())
values = list(data.values())
fig = plt.figure(figsize= (10,5))
plt.bar(Stats,values,color = 'blue',width = 0.4)
# - - - - - - - - - - - - - - - - - - - - - - - - - -


plt.title("Unemployment")
plt.ylabel('Value')
plt.xlabel('Statistic')
plt.show()

###############################################################################################
#check for NaN or null values
values = df3_filtered.isna()
count = values.sum()
usa_data = df3_filtered.loc[df3_filtered['country'] == 'United States', '2000':'2015']

usa_data = np.array(usa_data)
# for x in usa_data:
#    print(x)


Mean = np.mean(usa_data)
Mean1 = np.round(Mean,2)
print(Mean1)

Median =  np.median(usa_data)
Median1 = np.round(Median,2)
print(Median1)

std = np.std(usa_data)
std1 = np.round(std,2)
print(std1)

q1 = np.percentile(usa_data,25)
q3 = np.percentile(usa_data, 75)
IQR = q3 - q1
IQR1 = np.round(IQR,2)
print(IQR1)


data = {}
data['Mean'] = Mean1
data['Median'] = Median1
data['Std'] = std1
data['IQR'] = IQR1

Stats = list(data.keys())
values = list(data.values())
fig = plt.figure(figsize= (10,5))
plt.bar(Stats,values,color = 'blue',width = 0.4)
# - - - - - - - - - - - - - - - - - - - - - - - - - -
plt.title("GDP")
plt.ylabel('Value')
plt.xlabel('Statistic')
plt.show()
#####################################################################################################################
values = df2_filtered.isna()
count = values.sum()
usa_data = df2_filtered.loc[(df2_filtered['country'] == 'United States')]
#print(usa_data)
#---------------------------------------------------
anxiety_disorders = usa_data['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)']
stat = np.round(anxiety_disorders.describe(), 2)
median = np.median(stat)
print(stat)
print(median)
#------------------------------------------------------------
# Create a box plot to visualize the statistics
# Create a bar graph to visualize the statistics
fig, ax = plt.subplots()
ax.bar(stat.index, stat.values)
ax.set_title('Descriptive statistics for anxiety disorders(USA)')
ax.set_xlabel('Statistic')
ax.set_ylabel('Value')
plt.show()

#####################################################################################################################
#check for NaN or null values
values = filtered_df.isna()
count = values.sum()
#print(count)
#delete NaN columms
filtered_df = filtered_df.dropna(axis=1, how='any')
drop_column= [' gdp_for_year ($) ', 'gdp_per_capita ($)', 'country-year']
filtered_df = filtered_df.drop(drop_column, axis=1)
#print(df)
#-----------------------------------------------------
numbers_suicide = filtered_df['suicides_no']
stat = np.round(numbers_suicide.describe(), 2)
median = np.median(numbers_suicide)
print(stat)
print(median)
#------------------------------------------------------------
# Create a box plot to visualize the statistics
# Create a bar graph to visualize the statistics
fig, ax = plt.subplots()
ax.bar(stat.index, stat.values)
ax.set_title('Descriptive statistics for suicides_no')
ax.set_xlabel('Statistic')
ax.set_ylabel('Value')
plt.show()
###################################################################################################################
values = merged_df.isna()
count = values.sum()
Australia_data = merged_df.loc[(merged_df['country'] == 'Australia')]
#print(usa_data)
#---------------------------------------------------
anxiety_disorders = Australia_data['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)']
stat = np.round(anxiety_disorders.describe(), 2)
median = np.median(stat)
print(stat)
print(median)
#------------------------------------------------------------
# Create a box plot to visualize the statistics
# Create a bar graph to visualize the statistics
fig, ax = plt.subplots()
ax.bar(stat.index, stat.values)
ax.set_title('Descriptive statistics for anxiety disorders(Australia)')
ax.set_xlabel('Statistic')
ax.set_ylabel('Value')
plt.show()



# Create a figure with multiple subplots
fig, axs = plt.subplots(6, 2, figsize=(20, 30))

# Plot 1: Top N average suicide rates by country
top_n = 15
top_countries = average_suicide_rates_by_country.head(top_n)
axs[0, 0].barh(top_countries.index, top_countries.values)
axs[0, 0].invert_yaxis()
axs[0, 0].set_title(f'Top {top_n} Average Suicide Rates by Country')
axs[0, 0].set_xlabel('Suicides per 100k Population')

# Plot 2: Average suicide rates by gender
axs[0, 1].bar(average_suicide_rates_by_gender.index, average_suicide_rates_by_gender.values)
axs[0, 1].set_title('Average Suicide Rates by Gender')
axs[0, 1].set_ylabel('Suicides per 100k Population')

# Plot 3: Average suicide rates by age
axs[1, 0].bar(average_suicide_rates_by_age.index, average_suicide_rates_by_age.values)
axs[1, 0].set_title('Average Suicide Rates by Age')
axs[1, 0].set_ylabel('Suicides per 100k Population')
axs[1, 0].set_xticklabels(average_suicide_rates_by_age.index, rotation=45, ha='right')

# Plot 4: Average suicide rates by generation
# For average_suicide_rates_by_generation_and_year
avg_suicide_rates_by_gen_and_year = average_suicide_rates_by_generation_and_year.reset_index()
avg_suicide_rates_by_gen_and_year['year'] = pd.to_numeric(avg_suicide_rates_by_gen_and_year['year'])
axs[1, 1].bar(average_suicide_rates_by_generation.index, average_suicide_rates_by_generation.values)
axs[1, 1].set_title('Average Suicide Rates by Generation')
axs[1, 1].set_ylabel('Suicides per 100k Population')
axs[1, 1].set_xticklabels(average_suicide_rates_by_generation.index, rotation=45, ha='right')

# Plot 5: Suicide rates over time
axs[2, 0].plot(average_suicide_rates_by_year.index, average_suicide_rates_by_year.values)
axs[2, 0].set_xlabel('Year')
axs[2, 0].set_ylabel('Average Suicide Rate per 100k Population')
axs[2, 0].set_title('Suicide Rates Over Time')

# Plot 6: Average suicide rates by country and year
top_n_year = 5
# Find the top 15 countries based on average suicide rates
top_n_countries = average_suicide_rates_by_country_and_year.groupby('country').mean().nlargest(top_n_year).index

# Filter average_suicide_rates_by_country_and_year to only include the top 15 countries
top_n_average_suicide_rates_by_country_and_year = average_suicide_rates_by_country_and_year[average_suicide_rates_by_country_and_year.index.get_level_values('country').isin(top_n_countries)]

# For average_suicide_rates_by_country_and_year (Top 15 countries)
top_n_average_suicide_rates_by_country_and_year = top_n_average_suicide_rates_by_country_and_year.reset_index()
top_n_average_suicide_rates_by_country_and_year['year'] = pd.to_numeric(top_n_average_suicide_rates_by_country_and_year['year'])
for country, data in top_n_average_suicide_rates_by_country_and_year.groupby('country'):
    axs[2, 1].plot(data['year'], data['suicides/100k pop'], label=country)
axs[2, 1].set_xlabel('Year')
axs[2, 1].set_ylabel('Suicides per 100k Population')
axs[2, 1].set_title('Average Suicide Rates by Country and Year (Top 5 Countries)')
axs[2, 1].legend(title='Country', bbox_to_anchor=(1, 1))

# Plot 7: Average suicide rates by gender and year
average_suicide_rates_by_gender_and_year.unstack().plot(ax=axs[3, 0])
axs[3, 0].set_ylabel('Suicides per 100k Population')
axs[3, 0].set_title('Average Suicide Rates by Gender and Year')
axs[3, 0].legend(title='Gender', loc='upper right')

# Plot 8: Average suicide rates by age and year
average_suicide_rates_by_age_and_year.unstack().plot(ax=axs[3, 1])
axs[3, 1].set_ylabel('Suicides per 100k Population')
axs[3, 1].set_title('Average Suicide Rates by Age and Year')
axs[3, 1].legend(title='Age Group', loc='upper right')

#Plot 9: Top N average anxiety disorders prevalence by country
top_n = 15
top_n_prevalence = average_prevalence_by_country.sort_values(ascending=False).head(top_n)
axs[4, 0].barh(top_n_prevalence.index, top_n_prevalence.values)
axs[4, 0].invert_yaxis()
axs[4, 0].set_xlabel('Anxiety Disorders Prevalence (%)')
axs[4, 0].set_title('Top 15 Average Anxiety Disorders Prevalence by Country')

#Plot 10: Anxiety disorders prevalence over time
axs[4, 1].plot(mean_prevalence.index, mean_prevalence.values, label='Mean Prevalence')
axs[4, 1].plot(median_prevalence.index, median_prevalence.values, label='Median Prevalence')
axs[4, 1].set_xlabel('Year')
axs[4, 1].set_ylabel('Prevalence of Anxiety Disorders (%)')
axs[4, 1].set_title('Anxiety Disorders Prevalence Over Time')
axs[4, 1].legend()

#Plot 11: Average GDP over time
axs[5, 0].bar(average_gdp.index, average_gdp.values, label='Average GDP')
axs[5, 0].set_xlabel('Year')
axs[5, 0].set_ylabel('GDP (USD)')
axs[5, 0].set_title('Average GDP Over Time')
axs[5, 0].legend()

#Plot 12: Unemployment rate over time
axs[5, 1].plot(mean_unemployment.index, mean_unemployment.values, label='Mean Unemployment Rate')
axs[5, 1].plot(median_unemployment.index, median_unemployment.values, label='Median Unemployment Rate')
axs[5, 1].set_xlabel('Year')
axs[5, 1].set_ylabel('Unemployment Rate (%)')
axs[5, 1].set_title('Unemployment Rate Over Time')
axs[5, 1].legend()

#Adjust layout and display the plots
fig.tight_layout()
plt.show()
df.isnull().sum()
-------------------------------------------------------------------------------------------------------------------------------------------------------
country                   0
year                      0
sex                       0
age                       0
suicides_no               0
population                0
suicides/100k pop         0
country-year              0
HDI for year          19456
 gdp_for_year ($)         0
gdp_per_capita ($)        0
generation                0
dtype: int64




df2.isnull().sum()
-------------------------------------------------------------------------------------------------------------------------------------------------------
Entity                                                                            0
Code                                                                            690
Year                                                                              0
Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)      0
dtype: int64




df3.isnull().sum()
-------------------------------------------------------------------------------------------------------------------------------------------------------
   Country Name  Country Code  1960  1961  1962  1963  1964  1965  1966  1967  1968  1969  1970  1971  1972  1973  1974  1975  1976  1977  1978  1979  1980  1981  1982  1983  1984  1985  1986  1987  1988  1989  1990  1991  1992  1993  1994  1995  1996  1997  1998  1999  2000  2001  2002  2003  2004  2005  2006  2007  2008  2009  2010  2011  2012  2013  2014  2015  2016  2017  2018  2019  2020
0             0             0     1     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0




df4.isnull().sum()
-------------------------------------------------------------------------------------------------------------------------------------------------------
"LOCATION"      0
INDICATOR          0
SUBJECT            0
MEASURE            0
FREQUENCY          0
TIME               0
Value              0
Flag Codes       586
dtype: int64




df
-------------------------------------------------------------------------------------------------------------------------------------------------------
          country  year     sex          age  suicides_no  population  \
0         Albania  1987    male  15-24 years           21      312900   
1         Albania  1987    male  35-54 years           16      308000   
2         Albania  1987  female  15-24 years           14      289700   
3         Albania  1987    male    75+ years            1       21800   
4         Albania  1987    male  25-34 years            9      274300   
...           ...   ...     ...          ...          ...         ...   
27815  Uzbekistan  2014  female  35-54 years          107     3620833   
27816  Uzbekistan  2014  female    75+ years            9      348465   
27817  Uzbekistan  2014    male   5-14 years           60     2762158   
27818  Uzbekistan  2014  female   5-14 years           44     2631600   
27819  Uzbekistan  2014  female  55-74 years           21     1438935   

       suicides/100k pop    country-year  HDI for year  gdp_for_year ($)   \
0                   6.71     Albania1987           NaN      2,156,624,900   
1                   5.19     Albania1987           NaN      2,156,624,900   
2                   4.83     Albania1987           NaN      2,156,624,900   
3                   4.59     Albania1987           NaN      2,156,624,900   
4                   3.28     Albania1987           NaN      2,156,624,900   
...                  ...             ...           ...                ...   
27815               2.96  Uzbekistan2014         0.675     63,067,077,179   
27816               2.58  Uzbekistan2014         0.675     63,067,077,179   
27817               2.17  Uzbekistan2014         0.675     63,067,077,179   
27818               1.67  Uzbekistan2014         0.675     63,067,077,179   
27819               1.46  Uzbekistan2014         0.675     63,067,077,179   

       gdp_per_capita ($)       generation  
0                     796     Generation X  
1                     796           Silent  
2                     796     Generation X  
3                     796  G.I. Generation  
4                     796          Boomers  
...                   ...              ...  
27815                2309     Generation X  
27816                2309           Silent  
27817                2309     Generation Z  
27818                2309     Generation Z  
27819                2309          Boomers  

[27820 rows x 12 columns]




df2
-------------------------------------------------------------------------------------------------------------------------------------------------------
           Entity Code  Year  \
0     Afghanistan  AFG  1990   
1     Afghanistan  AFG  1991   
2     Afghanistan  AFG  1992   
3     Afghanistan  AFG  1993   
4     Afghanistan  AFG  1994   
...           ...  ...   ...   
6835     Zimbabwe  ZWE  2015   
6836     Zimbabwe  ZWE  2016   
6837     Zimbabwe  ZWE  2017   
6838     Zimbabwe  ZWE  2018   
6839     Zimbabwe  ZWE  2019   

      Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)  
0                                                  4.84                             
1                                                  4.82                             
2                                                  4.80                             
3                                                  4.79                             
4                                                  4.78                             
...                                                 ...                             
6835                                               3.32                             
6836                                               3.32                             
6837                                               3.33                             
6838                                               3.32                             
6839                                               3.28                             

[6840 rows x 4 columns]




df3
-------------------------------------------------------------------------------------------------------------------------------------------------------
                       Country Name Country Code          1960          1961  \
0       Africa Eastern and Southern          AFE  1.931311e+10  1.972349e+10   
1        Africa Western and Central          AFW  1.040428e+10  1.112805e+10   
2                         Australia          AUS  1.860679e+10  1.968306e+10   
3                           Austria          AUT  6.592694e+09  7.311750e+09   
4                           Burundi          BDI  1.960000e+08  2.030000e+08   
..                              ...          ...           ...           ...   
115  St. Vincent and the Grenadines          VCT  1.306656e+07  1.399988e+07   
116                           World          WLD  1.390000e+12  1.440000e+12   
117                    South Africa          ZAF  7.575397e+09  7.972997e+09   
118                          Zambia          ZMB  7.130000e+08  6.962857e+08   
119                        Zimbabwe          ZWE  1.052990e+09  1.096647e+09   

             1962          1963          1964          1965          1966  \
0    2.149392e+10  2.573321e+10  2.352744e+10  2.681057e+10  2.915216e+10   
1    1.194335e+10  1.267652e+10  1.383858e+10  1.486247e+10  1.583285e+10   
2    1.992272e+10  2.153993e+10  2.380110e+10  2.597715e+10  2.730989e+10   
3    7.756110e+09  8.374175e+09  9.169984e+09  9.994071e+09  1.088768e+10   
4    2.135000e+08  2.327500e+08  2.607500e+08  1.589950e+08  1.654446e+08   
..            ...           ...           ...           ...           ...   
115  1.452488e+07  1.370822e+07  1.475821e+07  1.510821e+07  1.609987e+07   
116  1.550000e+12  1.670000e+12  1.820000e+12  1.990000e+12  2.160000e+12   
117  8.497997e+09  9.423396e+09  1.037400e+10  1.133440e+10  1.235500e+10   
118  6.931429e+08  7.187143e+08  8.394286e+08  1.082857e+09  1.264286e+09   
119  1.117602e+09  1.159512e+09  1.217138e+09  1.311436e+09  1.281750e+09   

             1967  ...          2011          2012          2013  \
0    3.017317e+10  ...  9.430000e+11  9.510000e+11  9.640000e+11   
1    1.442643e+10  ...  6.710000e+11  7.280000e+11  8.210000e+11   
2    3.044462e+10  ...  1.400000e+12  1.550000e+12  1.580000e+12   
3    1.157943e+10  ...  4.310000e+11  4.090000e+11  4.300000e+11   
4    1.782971e+08  ...  2.235821e+09  2.333308e+09  2.451625e+09   
..            ...  ...           ...           ...           ...   
115  1.583518e+07  ...  6.761296e+08  6.929333e+08  7.212074e+08   
116  2.290000e+12  ...  7.370000e+13  7.530000e+13  7.740000e+13   
117  1.377739e+10  ...  4.580000e+11  4.340000e+11  4.010000e+11   
118  1.368000e+09  ...  2.345952e+10  2.550306e+10  2.803724e+10   
119  1.397002e+09  ...  1.410192e+10  1.711485e+10  1.909102e+10   

             2014          2015          2016          2017          2018  \
0    9.850000e+11  9.200000e+11  8.730000e+11  9.850000e+11  1.010000e+12   
1    8.650000e+11  7.610000e+11  6.910000e+11  6.840000e+11  7.420000e+11   
2    1.470000e+12  1.350000e+12  1.210000e+12  1.330000e+12  1.430000e+12   
3    4.420000e+11  3.820000e+11  3.960000e+11  4.160000e+11  4.550000e+11   
4    2.705783e+09  3.104395e+09  2.732809e+09  2.748180e+09  2.668496e+09   
..            ...           ...           ...           ...           ...   
115  7.277148e+08  7.554000e+08  7.744296e+08  7.921778e+08  8.113000e+08   
116  7.960000e+13  7.510000e+13  7.630000e+13  8.120000e+13  8.630000e+13   
117  3.810000e+11  3.470000e+11  3.240000e+11  3.810000e+11  4.050000e+11   
118  2.714102e+10  2.125122e+10  2.095841e+10  2.587360e+10  2.631159e+10   
119  1.949552e+10  1.996312e+10  2.054868e+10  1.758489e+10  1.811554e+10   

             2019          2020  
0    1.010000e+12  9.210000e+11  
1    7.950000e+11  7.850000e+11  
2    1.390000e+12  1.330000e+12  
3    4.450000e+11  4.330000e+11  
4    2.631434e+09  2.841786e+09  
..            ...           ...  
115  8.250407e+08  8.074741e+08  
116  8.760000e+13  8.470000e+13  
117  3.880000e+11  3.350000e+11  
118  2.330867e+10  1.811063e+10  
119  1.928429e+10  1.805117e+10  

[120 rows x 63 columns]




df4
-------------------------------------------------------------------------------------------------------------------------------------------------------
    "LOCATION" INDICATOR SUBJECT MEASURE FREQUENCY  TIME      Value  \
0             AUS       HUR     TOT   PC_LF         A  2000   6.285546   
1             AUS       HUR     TOT   PC_LF         A  2001   6.742173   
2             AUS       HUR     TOT   PC_LF         A  2002   6.368911   
3             AUS       HUR     TOT   PC_LF         A  2003   5.928420   
4             AUS       HUR     TOT   PC_LF         A  2004   5.396734   
..            ...       ...     ...     ...       ...   ...        ...   
626           CRI       HUR     TOT   PC_LF         A  2011  10.298480   
627           CRI       HUR     TOT   PC_LF         A  2012  10.171750   
628           CRI       HUR     TOT   PC_LF         A  2013   9.386163   
629           CRI       HUR     TOT   PC_LF         A  2014   9.617385   
630           CRI       HUR     TOT   PC_LF         A  2015   9.612973   

    Flag Codes  
0          NaN  
1          NaN  
2          NaN  
3          NaN  
4          NaN  
..         ...  
626        NaN  
627        NaN  
628        NaN  
629        NaN  
630        NaN  

[631 rows x 8 columns]




filtered_df.isnull().sum()
-------------------------------------------------------------------------------------------------------------------------------------------------------
country               0
year                  0
sex                   0
age                   0
suicides_no           0
population            0
suicides/100k pop     0
country-year          0
HDI for year          0
 gdp_for_year ($)     0
gdp_per_capita ($)    0
generation            0
dtype: int64




df2_filtered.isnull().sum()
-------------------------------------------------------------------------------------------------------------------------------------------------------
Entity                                                                          0
Year                                                                            0
Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)    0
dtype: int64




df3_filtered.isnull().sum()
-------------------------------------------------------------------------------------------------------------------------------------------------------
   Country Name  Country Code  2000  2001  2002  2003  2004  2005  2006  2007  2008  2009  2010  2011  2012  2013  2014  2015
0             0             0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0     0




df4_filtered.isnull().sum()
-------------------------------------------------------------------------------------------------------------------------------------------------------
LOCATION    0
TIME        0
Value       0
dtype: int64




filtered_df
-------------------------------------------------------------------------------------------------------------------------------------------------------
          country  year     sex          age  suicides_no  population  \
132       Albania  2000    male  25-34 years           17      232000   
133       Albania  2000    male  55-74 years           10      177400   
134       Albania  2000  female    75+ years            2       37800   
135       Albania  2000    male    75+ years            1       24900   
136       Albania  2000  female  15-24 years            6      263900   
...           ...   ...     ...          ...          ...         ...   
27815  Uzbekistan  2014  female  35-54 years          107     3620833   
27816  Uzbekistan  2014  female    75+ years            9      348465   
27817  Uzbekistan  2014    male   5-14 years           60     2762158   
27818  Uzbekistan  2014  female   5-14 years           44     2631600   
27819  Uzbekistan  2014  female  55-74 years           21     1438935   

       suicides/100k pop    country-year  HDI for year   gdp_for_year ($)   \
132                 7.33     Albania2000         0.656        3.632044e+09   
133                 5.64     Albania2000         0.656        3.632044e+09   
134                 5.29     Albania2000         0.656        3.632044e+09   
135                 4.02     Albania2000         0.656        3.632044e+09   
136                 2.27     Albania2000         0.656        3.632044e+09   
...                  ...             ...           ...                 ...   
27815               2.96  Uzbekistan2014         0.675        6.306708e+10   
27816               2.58  Uzbekistan2014         0.675        6.306708e+10   
27817               2.17  Uzbekistan2014         0.675        6.306708e+10   
27818               1.67  Uzbekistan2014         0.675        6.306708e+10   
27819               1.46  Uzbekistan2014         0.675        6.306708e+10   

       gdp_per_capita ($)       generation  
132                  1299     Generation X  
133                  1299           Silent  
134                  1299  G.I. Generation  
135                  1299  G.I. Generation  
136                  1299     Generation X  
...                   ...              ...  
27815                2309     Generation X  
27816                2309           Silent  
27817                2309     Generation Z  
27818                2309     Generation Z  
27819                2309          Boomers  

[16008 rows x 12 columns]




df2_filtered
-------------------------------------------------------------------------------------------------------------------------------------------------------
           Entity  Year  \
10    Afghanistan  2000   
11    Afghanistan  2001   
12    Afghanistan  2002   
13    Afghanistan  2003   
14    Afghanistan  2004   
...           ...   ...   
6831     Zimbabwe  2011   
6832     Zimbabwe  2012   
6833     Zimbabwe  2013   
6834     Zimbabwe  2014   
6835     Zimbabwe  2015   

      Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)  
10                                                 4.79                             
11                                                 4.79                             
12                                                 4.79                             
13                                                 4.79                             
14                                                 4.79                             
...                                                 ...                             
6831                                               3.29                             
6832                                               3.30                             
6833                                               3.30                             
6834                                               3.31                             
6835                                               3.32                             

[3648 rows x 3 columns]




df3_filtered
-------------------------------------------------------------------------------------------------------------------------------------------------------
                       Country Name Country Code          2000          2001  \
0       Africa Eastern and Southern          AFE  2.840000e+11  2.590000e+11   
1        Africa Western and Central          AFW  1.400000e+11  1.480000e+11   
2                         Australia          AUS  4.160000e+11  3.790000e+11   
3                           Austria          AUT  1.970000e+11  1.970000e+11   
4                           Burundi          BDI  8.704861e+08  8.767947e+08   
..                              ...          ...           ...           ...   
115  St. Vincent and the Grenadines          VCT  3.962630e+08  4.300407e+08   
116                           World          WLD  3.380000e+13  3.360000e+13   
117                    South Africa          ZAF  1.520000e+11  1.350000e+11   
118                          Zambia          ZMB  3.600683e+09  4.094481e+09   
119                        Zimbabwe          ZWE  6.689958e+09  6.777385e+09   

             2002          2003          2004          2005          2006  \
0    2.650000e+11  3.530000e+11  4.390000e+11  5.120000e+11  5.760000e+11   
1    1.770000e+11  2.050000e+11  2.540000e+11  3.110000e+11  3.930000e+11   
2    3.950000e+11  4.670000e+11  6.140000e+11  6.950000e+11  7.480000e+11   
3    2.130000e+11  2.620000e+11  3.010000e+11  3.160000e+11  3.360000e+11   
4    8.253945e+08  7.846544e+08  9.152573e+08  1.117113e+09  1.273375e+09   
..            ...           ...           ...           ...           ...   
115  4.618852e+08  4.818074e+08  5.219741e+08  5.507296e+08  6.109296e+08   
116  3.490000e+13  3.910000e+13  4.410000e+13  4.780000e+13  5.180000e+13   
117  1.290000e+11  1.970000e+11  2.560000e+11  2.890000e+11  3.040000e+11   
118  4.193846e+09  4.901840e+09  6.221078e+09  8.331870e+09  1.275686e+10   
119  6.342116e+09  5.727592e+09  5.805598e+09  5.755215e+09  5.443896e+09   

             2007          2008          2009          2010          2011  \
0    6.610000e+11  7.080000e+11  7.130000e+11  8.470000e+11  9.430000e+11   
1    4.620000e+11  5.660000e+11  5.070000e+11  5.920000e+11  6.710000e+11   
2    8.540000e+11  1.060000e+12  9.280000e+11  1.150000e+12  1.400000e+12   
3    3.890000e+11  4.300000e+11  4.000000e+11  3.920000e+11  4.310000e+11   
4    1.356199e+09  1.611836e+09  1.781455e+09  2.032135e+09  2.235821e+09   
..            ...           ...           ...           ...           ...   
115  6.844444e+08  6.954296e+08  6.749222e+08  6.812259e+08  6.761296e+08   
116  5.830000e+13  6.400000e+13  6.070000e+13  6.650000e+13  7.370000e+13   
117  3.330000e+11  3.160000e+11  3.300000e+11  4.170000e+11  4.580000e+11   
118  1.405696e+10  1.791086e+10  1.532834e+10  2.026556e+10  2.345952e+10   
119  5.291950e+09  4.415703e+09  9.665793e+09  1.204166e+10  1.410192e+10   

             2012          2013          2014          2015  
0    9.510000e+11  9.640000e+11  9.850000e+11  9.200000e+11  
1    7.280000e+11  8.210000e+11  8.650000e+11  7.610000e+11  
2    1.550000e+12  1.580000e+12  1.470000e+12  1.350000e+12  
3    4.090000e+11  4.300000e+11  4.420000e+11  3.820000e+11  
4    2.333308e+09  2.451625e+09  2.705783e+09  3.104395e+09  
..            ...           ...           ...           ...  
115  6.929333e+08  7.212074e+08  7.277148e+08  7.554000e+08  
116  7.530000e+13  7.740000e+13  7.960000e+13  7.510000e+13  
117  4.340000e+11  4.010000e+11  3.810000e+11  3.470000e+11  
118  2.550306e+10  2.803724e+10  2.714102e+10  2.125122e+10  
119  1.711485e+10  1.909102e+10  1.949552e+10  1.996312e+10  

[120 rows x 18 columns]




df4_filtered
-------------------------------------------------------------------------------------------------------------------------------------------------------
    LOCATION  TIME      Value
0        AUS  2000   6.285546
1        AUS  2001   6.742173
2        AUS  2002   6.368911
3        AUS  2003   5.928420
4        AUS  2004   5.396734
..       ...   ...        ...
626      CRI  2011  10.298480
627      CRI  2012  10.171750
628      CRI  2013   9.386163
629      CRI  2014   9.617385
630      CRI  2015   9.612973

[631 rows x 3 columns]




average_suicide_rates_by_country = filtered_df.groupby('country')['suicides/100k pop'].mean().sort_values(ascending=False)
-------------------------------------------------------------------------------------------------------------------------------------------------------
country
Lithuania              38.015208
Republic of Korea      35.543646
Russian Federation     31.338229
Guyana                 30.191667
Sri Lanka              30.104000
                         ...    
Antigua and Barbuda     0.874405
Barbados                0.834881
Oman                    0.736111
Jamaica                 0.688583
Kiribati                0.000000
Name: suicides/100k pop, Length: 97, dtype: float64




average_suicide_rates_by_country_and_year = filtered_df.groupby(['year','country'])['suicides/100k pop'].mean().sort_values(ascending=False)
-------------------------------------------------------------------------------------------------------------------------------------------------------
year  country            
2000  Albania                 2.558333
      Antigua and Barbuda     3.330833
      Argentina              10.949167
      Armenia                 2.858333
      Aruba                  25.444167
                               ...    
2015  Turkmenistan            2.373333
      Ukraine                20.393333
      United Kingdom          7.228333
      United States          14.617500
      Uruguay                22.501667
Name: suicides/100k pop, Length: 1334, dtype: float64




average_suicide_rates_by_gender = filtered_df.groupby('sex')['suicides/100k pop'].mean()
-------------------------------------------------------------------------------------------------------------------------------------------------------
sex
female     4.901246
male      19.299390
Name: suicides/100k pop, dtype: float64




average_suicide_rates_by_gender_and_year = filtered_df.groupby(['year','sex'])['suicides/100k pop'].mean()
-------------------------------------------------------------------------------------------------------------------------------------------------------
year  sex   
2000  female     5.783004
      male      22.099651
2001  female     5.491932
      male      21.546345
2002  female     5.606105
      male      21.966996
2003  female     5.302384
      male      21.107655
2004  female     5.054127
      male      19.909762
2005  female     5.007599
      male      19.129286
2006  female     4.769059
      male      19.085863
2007  female     4.994012
      male      19.056667
2008  female     4.979412
      male      18.835961
2009  female     4.528240
      male      18.273333
2010  female     4.406117
      male      18.025682
2011  female     4.344322
      male      17.686376
2012  female     4.521626
      male      18.321811
2013  female     4.366208
      male      17.849708
2014  female     4.458803
      male      17.564124
2015  female     4.653468
      male      17.534677
Name: suicides/100k pop, dtype: float64




average_suicide_rates_by_age = filtered_df.groupby('age')['suicides/100k pop'].mean().sort_values(ascending=False)
-------------------------------------------------------------------------------------------------------------------------------------------------------
age
75+ years      22.080315
55-74 years    15.342031
35-54 years    14.420795
25-34 years    11.625240
15-24 years     8.504663
5-14 years      0.628864
Name: suicides/100k pop, dtype: float64




average_suicide_rates_by_age_and_year = filtered_df.groupby(['year','age'])['suicides/100k pop'].mean().sort_values(ascending=False)
-------------------------------------------------------------------------------------------------------------------------------------------------------
year  age        
2000  15-24 years     9.442733
      25-34 years    13.756919
      35-54 years    16.443372
      5-14 years      0.532442
      55-74 years    17.531279
                       ...    
2015  25-34 years    10.111774
      35-54 years    12.555645
      5-14 years      0.704677
      55-74 years    14.451774
      75+ years      20.967339
Name: suicides/100k pop, Length: 96, dtype: float64




correlations = filtered_df[['suicides/100k pop', 'gdp_per_capita ($)', ' gdp_for_year ($) ']].corr()
-------------------------------------------------------------------------------------------------------------------------------------------------------
                    suicides/100k pop  gdp_per_capita ($)   gdp_for_year ($) 
suicides/100k pop            1.000000           -0.010388            0.026922
gdp_per_capita ($)          -0.010388            1.000000            0.271639
 gdp_for_year ($)            0.026922            0.271639            1.000000




average_suicide_rates_by_generation = filtered_df.groupby('generation')['suicides/100k pop'].mean().sort_values(ascending=False)
-------------------------------------------------------------------------------------------------------------------------------------------------------
generation
G.I. Generation    25.941221
Silent             19.236765
Boomers            14.789173
Generation X       12.256906
Millenials          6.720002
Generation Z        0.642299
Name: suicides/100k pop, dtype: float64




average_suicide_rates_by_generation_and_year = filtered_df.groupby(['year', 'generation'])['suicides/100k pop'].mean().sort_values(ascending=False)
-------------------------------------------------------------------------------------------------------------------------------------------------------
year  generation     
2000  Boomers            16.443372
      G.I. Generation    25.941221
      Generation X       11.599826
      Millenials          0.532442
      Silent             17.531279
                           ...    
2015  Boomers            14.451774
      Generation X       12.555645
      Generation Z        0.704677
      Millenials          8.942500
      Silent             20.967339
Name: suicides/100k pop, Length: 73, dtype: float64




average_suicide_rates_by_year = filtered_df.groupby('year')['suicides/100k pop'].mean()
-------------------------------------------------------------------------------------------------------------------------------------------------------
year
2000    13.941328
2001    13.519138
2002    13.786550
2003    13.205019
2004    12.481944
2005    12.068442
2006    11.927461
2007    12.025339
2008    11.907686
2009    11.400787
2010    11.215900
2011    11.015349
2012    11.421718
2013    11.107958
2014    11.011464
2015    11.094073
Name: suicides/100k pop, dtype: float64




prevalence_by_country = df2_filtered.groupby(['Entity', 'Year'])['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].mean()
-------------------------------------------------------------------------------------------------------------------------------------------------------
Entity       Year
Afghanistan  2000    4.79
             2001    4.79
             2002    4.79
             2003    4.79
             2004    4.79
                     ... 
Zimbabwe     2011    3.29
             2012    3.30
             2013    3.30
             2014    3.31
             2015    3.32
Name: Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent), Length: 3648, dtype: float64




average_prevalence_by_country = df2_filtered.groupby('Entity')['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].mean()
-------------------------------------------------------------------------------------------------------------------------------------------------------
Entity
Afghanistan                       4.841875
African Region (WHO)              3.537500
Albania                           3.990625
Algeria                           4.918125
American Samoa                    4.193750
                                    ...   
World Bank Lower Middle Income    3.293125
World Bank Upper Middle Income    4.211875
Yemen                             4.914375
Zambia                            3.881250
Zimbabwe                          3.303125
Name: Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent), Length: 228, dtype: float64




mean_prevalence = df2_filtered.groupby('Year')['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].mean()
-------------------------------------------------------------------------------------------------------------------------------------------------------
Year
2000    4.305789
2001    4.309298
2002    4.311798
2003    4.313684
2004    4.315526
2005    4.317544
2006    4.323596
2007    4.334781
2008    4.348816
2009    4.361491
2010    4.369123
2011    4.373772
2012    4.378772
2013    4.383860
2014    4.389649
2015    4.394605
Name: Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent), dtype: float64




median_prevalence = df2_filtered.groupby('Year')['Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)'].median()
-------------------------------------------------------------------------------------------------------------------------------------------------------
Year
2000    4.055
2001    4.055
2002    4.055
2003    4.055
2004    4.055
2005    4.055
2006    4.070
2007    4.090
2008    4.115
2009    4.130
2010    4.140
2011    4.150
2012    4.160
2013    4.170
2014    4.180
2015    4.180
Name: Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent), dtype: float64




total_gdp = df3_filtered.iloc[:, 2:].sum(axis=0)
-------------------------------------------------------------------------------------------------------------------------------------------------------
2000    2.160246e+14
2001    2.151838e+14
2002    2.233870e+14
2003    2.505006e+14
2004    2.831420e+14
2005    3.095809e+14
2006    3.389768e+14
2007    3.857718e+14
2008    4.279962e+14
2009    4.111343e+14
2010    4.573194e+14
2011    5.119564e+14
2012    5.284604e+14
2013    5.477950e+14
2014    5.653896e+14
2015    5.399171e+14
dtype: float64




average_gdp = df3_filtered.iloc[:, 2:].mean(axis=0)
-------------------------------------------------------------------------------------------------------------------------------------------------------
2000    1.800205e+12
2001    1.793198e+12
2002    1.861558e+12
2003    2.087505e+12
2004    2.359517e+12
2005    2.579841e+12
2006    2.824807e+12
2007    3.214765e+12
2008    3.566635e+12
2009    3.426120e+12
2010    3.810995e+12
2011    4.266303e+12
2012    4.403837e+12
2013    4.564958e+12
2014    4.711580e+12
2015    4.499309e+12
dtype: float64




mean_unemployment = df4_filtered.groupby('TIME')['Value'].mean()
-------------------------------------------------------------------------------------------------------------------------------------------------------
TIME
2000    7.901248
2001    7.765694
2002    7.832388
2003    7.879029
2004    7.936171
2005    7.560420
2006    6.812503
2007    6.196204
2008    6.307551
2009    8.764485
2010    9.282159
2011    8.937410
2012    9.230890
2013    9.234676
2014    8.744107
2015    8.165753
Name: Value, dtype: float64




median_unemployment = df4_filtered.groupby('TIME')['Value'].median()
-------------------------------------------------------------------------------------------------------------------------------------------------------
TIME
2000    6.779167
2001    6.666920
2002    6.886809
2003    7.575000
2004    7.375000
2005    7.783333
2006    6.466667
2007    5.966615
2008    6.239756
2009    8.219090
2010    8.375000
2011    8.045834
2012    8.020834
2013    8.200000
2014    7.456700
2015    6.915025
Name: Value, dtype: float64




<ipython-input-31-29986ce04bff>:336: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df3_filtered.rename(columns={'Country Name': 'country', 'Country Code': 'country_code'}, inplace=True)
merged_df = filtered_df.merge(df2_filtered, on=['country', 'year'], how='left')
-------------------------------------------------------------------------------------------------------------------------------------------------------
          country  year     sex          age  suicides_no  population  \
0         Albania  2000    male  25-34 years           17      232000   
1         Albania  2000    male  55-74 years           10      177400   
2         Albania  2000  female    75+ years            2       37800   
3         Albania  2000    male    75+ years            1       24900   
4         Albania  2000  female  15-24 years            6      263900   
...           ...   ...     ...          ...          ...         ...   
16003  Uzbekistan  2014  female  35-54 years          107     3620833   
16004  Uzbekistan  2014  female    75+ years            9      348465   
16005  Uzbekistan  2014    male   5-14 years           60     2762158   
16006  Uzbekistan  2014  female   5-14 years           44     2631600   
16007  Uzbekistan  2014  female  55-74 years           21     1438935   

       suicides/100k pop    country-year  HDI for year   gdp_for_year ($)   \
0                   7.33     Albania2000         0.656        3.632044e+09   
1                   5.64     Albania2000         0.656        3.632044e+09   
2                   5.29     Albania2000         0.656        3.632044e+09   
3                   4.02     Albania2000         0.656        3.632044e+09   
4                   2.27     Albania2000         0.656        3.632044e+09   
...                  ...             ...           ...                 ...   
16003               2.96  Uzbekistan2014         0.675        6.306708e+10   
16004               2.58  Uzbekistan2014         0.675        6.306708e+10   
16005               2.17  Uzbekistan2014         0.675        6.306708e+10   
16006               1.67  Uzbekistan2014         0.675        6.306708e+10   
16007               1.46  Uzbekistan2014         0.675        6.306708e+10   

       gdp_per_capita ($)       generation  \
0                    1299     Generation X   
1                    1299           Silent   
2                    1299  G.I. Generation   
3                    1299  G.I. Generation   
4                    1299     Generation X   
...                   ...              ...   
16003                2309     Generation X   
16004                2309           Silent   
16005                2309     Generation Z   
16006                2309     Generation Z   
16007                2309          Boomers   

       Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)  
0                                                   3.93                             
1                                                   3.93                             
2                                                   3.93                             
3                                                   3.93                             
4                                                   3.93                             
...                                                  ...                             
16003                                               2.12                             
16004                                               2.12                             
16005                                               2.12                             
16006                                               2.12                             
16007                                               2.12                             

[16008 rows x 13 columns]




merged_df = merged_df.merge(df3_long, on=['country', 'year'], how='left')
-------------------------------------------------------------------------------------------------------------------------------------------------------
          country  year     sex          age  suicides_no  population  \
0         Albania  2000    male  25-34 years           17      232000   
1         Albania  2000    male  55-74 years           10      177400   
2         Albania  2000  female    75+ years            2       37800   
3         Albania  2000    male    75+ years            1       24900   
4         Albania  2000  female  15-24 years            6      263900   
...           ...   ...     ...          ...          ...         ...   
16003  Uzbekistan  2014  female  35-54 years          107     3620833   
16004  Uzbekistan  2014  female    75+ years            9      348465   
16005  Uzbekistan  2014    male   5-14 years           60     2762158   
16006  Uzbekistan  2014  female   5-14 years           44     2631600   
16007  Uzbekistan  2014  female  55-74 years           21     1438935   

       suicides/100k pop    country-year  HDI for year   gdp_for_year ($)   \
0                   7.33     Albania2000         0.656        3.632044e+09   
1                   5.64     Albania2000         0.656        3.632044e+09   
2                   5.29     Albania2000         0.656        3.632044e+09   
3                   4.02     Albania2000         0.656        3.632044e+09   
4                   2.27     Albania2000         0.656        3.632044e+09   
...                  ...             ...           ...                 ...   
16003               2.96  Uzbekistan2014         0.675        6.306708e+10   
16004               2.58  Uzbekistan2014         0.675        6.306708e+10   
16005               2.17  Uzbekistan2014         0.675        6.306708e+10   
16006               1.67  Uzbekistan2014         0.675        6.306708e+10   
16007               1.46  Uzbekistan2014         0.675        6.306708e+10   

       gdp_per_capita ($)       generation  \
0                    1299     Generation X   
1                    1299           Silent   
2                    1299  G.I. Generation   
3                    1299  G.I. Generation   
4                    1299     Generation X   
...                   ...              ...   
16003                2309     Generation X   
16004                2309           Silent   
16005                2309     Generation Z   
16006                2309     Generation Z   
16007                2309          Boomers   

       Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)  \
0                                                   3.93                              
1                                                   3.93                              
2                                                   3.93                              
3                                                   3.93                              
4                                                   3.93                              
...                                                  ...                              
16003                                               2.12                              
16004                                               2.12                              
16005                                               2.12                              
16006                                               2.12                              
16007                                               2.12                              

      country_code  gdp  
0              NaN  NaN  
1              NaN  NaN  
2              NaN  NaN  
3              NaN  NaN  
4              NaN  NaN  
...            ...  ...  
16003          NaN  NaN  
16004          NaN  NaN  
16005          NaN  NaN  
16006          NaN  NaN  
16007          NaN  NaN  

[16008 rows x 15 columns]




merged_df = merged_df.merge(df4_filtered, on=['country', 'year'], how='left')
-------------------------------------------------------------------------------------------------------------------------------------------------------
             country  year     sex          age  suicides_no  population  \
804        Australia  2000    male  25-34 years          466     1430700   
805        Australia  2000    male    75+ years          115      416077   
806        Australia  2000    male  35-54 years          745     2769752   
807        Australia  2000    male  15-24 years          271     1333011   
808        Australia  2000    male  55-74 years          281     1522620   
...              ...   ...     ...          ...          ...         ...   
15679  United States  2015  female  25-34 years         1444    21555712   
15680  United States  2015  female  15-24 years         1132    21633813   
15681  United States  2015  female    75+ years          540    11778666   
15682  United States  2015    male   5-14 years          255    21273987   
15683  United States  2015  female   5-14 years          158    20342901   

       suicides/100k pop   gdp_for_year ($)   gdp_per_capita ($)  \
804                32.57        4.150342e+11               23219   
805                27.64        4.150342e+11               23219   
806                26.90        4.150342e+11               23219   
807                20.33        4.150342e+11               23219   
808                18.46        4.150342e+11               23219   
...                  ...                 ...                 ...   
15679               6.70        1.812071e+13               60387   
15680               5.23        1.812071e+13               60387   
15681               4.58        1.812071e+13               60387   
15682               1.20        1.812071e+13               60387   
15683               0.78        1.812071e+13               60387   

            generation  \
804       Generation X   
805    G.I. Generation   
806            Boomers   
807       Generation X   
808             Silent   
...                ...   
15679       Millenials   
15680       Millenials   
15681           Silent   
15682     Generation Z   
15683     Generation Z   

       Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)  \
804                                                 5.61                              
805                                                 5.61                              
806                                                 5.61                              
807                                                 5.61                              
808                                                 5.61                              
...                                                  ...                              
15679                                               5.61                              
15680                                               5.61                              
15681                                               5.61                              
15682                                               5.61                              
15683                                               5.61                              

          value  
804    6.285546  
805    6.285546  
806    6.285546  
807    6.285546  
808    6.285546  
...         ...  
15679  5.291667  
15680  5.291667  
15681  5.291667  
15682  5.291667  
15683  5.291667  

[4128 rows x 12 columns]




merged_df.isnull().sum()
-------------------------------------------------------------------------------------------------------------------------------------------------------
country                                                                         0
year                                                                            0
sex                                                                             0
age                                                                             0
suicides_no                                                                     0
population                                                                      0
suicides/100k pop                                                               0
 gdp_for_year ($)                                                               0
gdp_per_capita ($)                                                              0
generation                                                                      0
Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)    0
value                                                                           0
dtype: int64




merged_df.describe(include='all')
-------------------------------------------------------------------------------------------------------------------------------------------------------
       country         year   sex          age   suicides_no    population  \
count     4128  4128.000000  4128         4128   4128.000000  4.128000e+03   
unique      24          NaN     2            6           NaN           NaN   
top      Italy          NaN  male  25-34 years           NaN           NaN   
freq       192          NaN  2064          688           NaN           NaN   
mean       NaN  2007.720930   NaN          NaN    424.522771  3.507311e+06   
std        NaN     4.558069   NaN          NaN   1162.719207  5.880734e+06   
min        NaN  2000.000000   NaN          NaN      0.000000  6.532000e+03   
25%        NaN  2004.000000   NaN          NaN     17.000000  4.877772e+05   
50%        NaN  2008.000000   NaN          NaN     88.000000  1.277342e+06   
75%        NaN  2012.000000   NaN          NaN    290.000000  3.959502e+06   
max        NaN  2015.000000   NaN          NaN  11767.000000  4.380521e+07   

        suicides/100k pop   gdp_for_year ($)   gdp_per_capita ($)  generation  \
count         4128.000000        4.128000e+03         4128.000000        4128   
unique                NaN                 NaN                 NaN           6   
top                   NaN                 NaN                 NaN  Millenials   
freq                  NaN                 NaN                 NaN        1156   
mean            11.867384        1.563796e+12        40700.299419         NaN   
std             13.023472        3.035203e+12        23309.800208         NaN   
min              0.000000        1.131644e+10         4866.000000         NaN   
25%              2.340000        2.382038e+11        24759.000000         NaN   
50%              7.670000        4.880970e+11        40347.500000         NaN   
75%             17.502500        1.464961e+12        51772.000000         NaN   
max             99.840000        1.812071e+13       126352.000000         NaN   

        Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent)  \
count                                         4128.000000                              
unique                                                NaN                              
top                                                   NaN                              
freq                                                  NaN                              
mean                                             5.677878                              
std                                              1.341483                              
min                                              2.560000                              
25%                                              4.730000                              
50%                                              5.830000                              
75%                                              6.482500                              
max                                              8.780000                              

              value  
count   4128.000000  
unique          NaN  
top             NaN  
freq            NaN  
mean       7.641546  
std        4.035549  
min        1.900000  
25%        5.031250  
50%        6.870833  
75%        9.008333  
max       27.825000  




grouped_data = merged_df.groupby(['country', 'year']).agg
-------------------------------------------------------------------------------------------------------------------------------------------------------
           country  year  suicides_no  population   gdp_for_year ($)   \
0        Australia  2000         2391    17874410        4.150342e+11   
1        Australia  2001         2458    18130883        3.782151e+11   
2        Australia  2002         2319    18370058        3.944867e+11   
3        Australia  2003         2156    18608029        4.662947e+11   
4        Australia  2004         2114    18854551        6.119043e+11   
..             ...   ...          ...         ...                 ...   
339  United States  2011        39508   290313825        1.551793e+13   
340  United States  2012        40596   292827128        1.615526e+13   
341  United States  2013        41143   295322862        1.669152e+13   
342  United States  2014        42769   297749735        1.742761e+13   
343  United States  2015        44189   300078511        1.812071e+13   

     gdp_per_capita ($)  Anxiety Prevalence (%)  Unemployment Rate(%)  \
0                 23219                    5.61              6.285546   
1                 20860                    5.66              6.742173   
2                 21474                    5.78              6.368911   
3                 25059                    5.92              5.928420   
4                 32454                    6.04              5.396734   
..                  ...                     ...                   ...   
339               53452                    6.22              8.950000   
340               55170                    6.03              8.066667   
341               56520                    5.84              7.375000   
342               58531                    5.68              6.166667   
343               60387                    5.61              5.291667   

     suicides/100k pop  
0            13.376665  
1            13.556979  
2            12.623803  
3            11.586396  
4            11.212147  
..                 ...  
339          13.608722  
340          13.863470  
341          13.931532  
342          14.364077  
343          14.725813  

[344 rows x 9 columns]




grouped_data.describe(include='all')
-------------------------------------------------------------------------------------------------------------------------------------------------------
       country         year   suicides_no    population   gdp_for_year ($)   \
count      344   344.000000    344.000000  3.440000e+02        3.440000e+02   
unique      24          NaN           NaN           NaN                 NaN   
top      Italy          NaN           NaN           NaN                 NaN   
freq        16          NaN           NaN           NaN                 NaN   
mean       NaN  2007.720930   5094.273256  4.208773e+07        1.563796e+12   
std        NaN     4.564155   9151.842382  6.224772e+07        3.039256e+12   
min        NaN  2000.000000     26.000000  2.683300e+05        1.131644e+10   
25%        NaN  2004.000000    905.750000  7.858817e+06        2.382038e+11   
50%        NaN  2008.000000   1844.500000  1.542053e+07        4.880970e+11   
75%        NaN  2012.000000   3980.750000  5.648715e+07        1.464961e+12   
max        NaN  2015.000000  44189.000000  3.000785e+08        1.812071e+13   

        gdp_per_capita ($)  Anxiety Prevalence (%)  Unemployment Rate(%)  \
count           344.000000              344.000000            344.000000   
unique                 NaN                     NaN                   NaN   
top                    NaN                     NaN                   NaN   
freq                   NaN                     NaN                   NaN   
mean          40700.299419                5.677878              7.641546   
std           23340.927117                1.343274              4.040938   
min            4866.000000                2.560000              1.900000   
25%           24759.000000                4.730000              5.031250   
50%           40347.500000                5.830000              6.870833   
75%           51772.000000                6.482500              9.008333   
max          126352.000000                8.780000             27.825000   

        suicides/100k pop  
count          344.000000  
unique                NaN  
top                   NaN  
freq                  NaN  
mean            12.036260  
std              5.413287  
min              1.591815  
25%              7.764163  
50%             11.738373  
75%             14.491590  
max             26.480336  




No description has been provided for this image
14068750000000.0
14450000000000.0
2398363700004.65
3650000000000.0
No description has been provided for this image
count    16.00
mean      6.48
std       0.52
min       5.61
25%       6.17
50%       6.49
75%       6.82
max       7.31
Name: Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent), dtype: float64
6.485
No description has been provided for this image
count    16008.00
mean       237.05
std        884.03
min          0.00
25%          2.00
50%         23.00
75%        120.00
max      21262.00
Name: suicides_no, dtype: float64
23.0
No description has been provided for this image
count    180.00
mean       6.01
std        0.18
min        5.61
25%        5.92
50%        6.08
75%        6.14
max        6.16
Name: Prevalence - Anxiety disorders - Sex: Both - Age: Age-standardized (Percent), dtype: float64
6.045
No description has been provided for this image
<ipython-input-31-29986ce04bff>:611: UserWarning: FixedFormatter should only be used together with FixedLocator
  axs[1, 0].set_xticklabels(average_suicide_rates_by_age.index, rotation=45, ha='right')
<ipython-input-31-29986ce04bff>:620: UserWarning: FixedFormatter should only be used together with FixedLocator
  axs[1, 1].set_xticklabels(average_suicide_rates_by_generation.index, rotation=45, ha='right')
No description has been provided for this image
In [ ]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from statsmodels.tsa.arima.model import ARIMA

# Assuming your data is in a pandas DataFrame called "data"
# Calculate correlation matrix


# Display correlation matrix
import pandas as pd
import scipy.stats
import matplotlib.pyplot as plt


# ------------------------------
df = pd.read_csv("transformed_data.csv")
# ------------------------------

def predict_future(df, independent_var):
    # Group the data by year and calculate the mean suicide rate for the given independent variable
    df_yearly = df.groupby('year')[independent_var].mean().reset_index()

    # Set the year as the index
    df_yearly.set_index('year', inplace=True)

    # Determine the order of the ARIMA model (p, d, q)
    # You may need to experiment with different values or use an automated approach
    p, d, q = 1, 1, 1

    # Fit the ARIMA model
    model = ARIMA(df_yearly, order=(p, d, q))
    model_fit = model.fit()

    # Make predictions for the next 5 years
    forecast_years = 5
    forecast = model_fit.forecast(steps=forecast_years)

    # Plot the historical data and the predictions
    plt.figure(figsize=(12, 6))
    plt.plot(df_yearly.index, df_yearly[independent_var], label='Historical Data')
    plt.plot(np.arange(df_yearly.index[-1]+1, df_yearly.index[-1]+1+forecast_years), forecast, label='Predictions', linestyle='--', color='red')
    plt.xlabel('Year')
    plt.ylabel(independent_var)
    plt.title(f'{independent_var} : Historical Data and Predictions')
    plt.legend()
    plt.show()

    return forecast
def psk(x,y):
  Pearson = scipy.stats.pearsonr(x,y)[0]
  Spearmanr = scipy.stats.spearmanr(x,y)[0]
  Kendall = scipy.stats.kendalltau(x,y)[0]

  print("Pearson: " + str(Pearson))
  print("Spearmanr: " + str(Spearmanr))
  print("Kendall: " + str(Kendall))

def visual(x,y):
  slope, intercept, r, p, stderr = scipy.stats.linregress(x,y)
  line = f'Regression line: y={intercept:.2f}+{slope:.2f}x,r={r:.2f}'

  fig, ax = plt.subplots()
  ax.plot(x,y, linewidth=0, marker='s', label='Data points')
  ax.plot(x, intercept + slope * x, label=line)
  ax.set_xlabel('x')
  ax.set_ylabel('y')
  ax.legend(facecolor='white',loc='upper left')
  plt.show
def data_analytics(val, country):
  filtered_data = grouped_data[grouped_data['country'] == country]


  # Preparing data for linear regression
  X = filtered_data[val]
  y = filtered_data['suicides/100k pop']

  # Splitting the data into training and testing sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

  # Fitting the linear regression model
  model = LinearRegression()

  X_train_reshaped = X_train.values.reshape(-1, 1)
  y_train_reshaped = y_train.values.reshape(-1, 1)
  X_test_reshaped = X_test.values.reshape(-1, 1)

  model.fit(X_train_reshaped, y_train_reshaped)

  # Making predictions
  y_pred = model.predict(X_test_reshaped)

  # Evaluating the model
  mse = mean_squared_error(y_test, y_pred)
  r2 = r2_score(y_test, y_pred)

  df_preds = pd.DataFrame({'Actual': y_test.squeeze(), 'Predicted': y_pred.squeeze()})


  print(f'Current country being analyzed is {country} and the current independent variable is {val}')
  print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
  print(df_preds.reset_index())
  print()
  print()
  print()
  print()

  print("Mean Squared Error")
  print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
  print(mse)
  print()
  print()
  print()
  print()

  print("R-squared")
  print("-------------------------------------------------------------------------------------------------------------------------------------------------------")
  print(r2)
  print()
  print()
  print()
  print()



  plt.scatter(X_train, y_train,color='b')
  plt.plot(X_test, y_pred,color='k')
  plt.show()


  plt.figure(figsize=(12, 6))
  plt.scatter(df_preds.index, df_preds['Actual'], label='Actual', alpha=0.7)
  plt.scatter(df_preds.index, df_preds['Predicted'], label='Predicted', alpha=0.7)
  plt.xlabel("Index")
  plt.ylabel("Suicides/100k pop")
  plt.title("Actual vs Predicted Suicide Rates")
  plt.legend()
  plt.show()

  residuals = df_preds['Actual'] - df_preds['Predicted']

  plt.figure(figsize=(12, 6))
  plt.scatter(df_preds.index, residuals, alpha=0.7)
  plt.axhline(y=0, color='r', linestyle='--')
  plt.xlabel("Index")
  plt.ylabel("Residuals")
  plt.title("Residual Plot")
  plt.show()
  #sns.pairplot(filtered_data[['suicides/100k pop', 'year', 'gdp_per_capita ($)', 'Anxiety Prevalence (%)']])
  plt.show()

  df_yearly = df.groupby('year')['suicides/100k pop'].mean().reset_index()
  df_yearly.to_csv('df_yearly.csv', index=False)
  # Set the year as the index
  df_yearly.set_index('year', inplace=True)

  # Determine the order of the ARIMA model (p, d, q)
  # You may need to experiment with different values or use an automated approach
  p, d, q = 1, 1, 1

  # Fit the ARIMA model
  model = ARIMA(df_yearly, order=(p, d, q))
  model_fit = model.fit()

  # Make predictions for the next 5 years
  forecast_years = 5
  forecast = model_fit.forecast(steps=forecast_years)

  # Plot the historical data and the predictions
  plt.figure(figsize=(12, 6))
  plt.plot(df_yearly.index, df_yearly['suicides/100k pop'], label='Historical Data')
  plt.plot(np.arange(df_yearly.index[-1]+1, df_yearly.index[-1]+1+forecast_years), forecast, label='Predictions', linestyle='--', color='red')
  plt.xlabel('Year')
  plt.ylabel('Suicides/100k pop')
  plt.title('Suicide Rates: Historical Data and Predictions')
  plt.legend()
  plt.show()
def diff_country(country):
  filtered_data = grouped_data[grouped_data['country'] == country]
  correlation_matrix = filtered_data.corr()
  plt.figure(figsize=(12, 8))
  sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
  plt.title("Correlation Matrix")
  plt.show()
  data_analytics('year', country)
  data_analytics('population', country)
  data_analytics(' gdp_for_year ($) ', country)
  data_analytics('gdp_per_capita ($)', country)
  data_analytics('Anxiety Prevalence (%)', country)
  data_analytics('Unemployment Rate(%)', country)


diff_country('United States')
<ipython-input-42-f5e85508bd51>:180: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  correlation_matrix = filtered_data.corr()
No description has been provided for this image
Current country being analyzed is United States and the current independent variable is year
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index     Actual  Predicted
0    328  11.189108  10.869199
1    329  11.531207  11.111481
2    333  11.817534  12.080610
3    342  14.364077  14.261149




Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.08957859509325357




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.9430894492071287




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
0     13.376665
1     13.556979
2     12.623803
3     11.586396
4     11.212147
        ...    
75    10.903971
76    11.095198
84     4.829425
85     5.241577
95    20.033117
Name: suicides/100k pop, Length: 80, dtype: float64
---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-42-f5e85508bd51> in <cell line: 193>()
    191 
    192 
--> 193 diff_country('United States')
    194 
    195 

<ipython-input-42-f5e85508bd51> in diff_country(country)
    183   plt.title("Correlation Matrix")
    184   plt.show()
--> 185   data_analytics('year', country)
    186   data_analytics('population', country)
    187   data_analytics(' gdp_for_year ($) ', country)

<ipython-input-42-f5e85508bd51> in data_analytics(val, country)
    153   print(df_yearly.head())
    154   # Set the year as the index
--> 155   df_yearly.set_index('year', inplace=True)
    156 
    157   # Determine the order of the ARIMA model (p, d, q)

/usr/local/lib/python3.10/dist-packages/pandas/core/groupby/groupby.py in __getattr__(self, attr)
    985             return self[attr]
    986 
--> 987         raise AttributeError(
    988             f"'{type(self).__name__}' object has no attribute '{attr}'"
    989         )

AttributeError: 'SeriesGroupBy' object has no attribute 'set_index'
In [ ]:
diff_country('Austria')
<ipython-input-132-e3896bfb6725>:124: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  correlation_matrix = filtered_data.corr()
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
5013.347647238664




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.5183757093701598




Current country being analyzed is Austria and the current independent variable is year
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     15    1588  1484.749097
1     16    1489  1465.859206
2     20    1399  1390.299639
3     29    1314  1220.290614




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
4473.375039995245




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.5702500141705459




Current country being analyzed is Austria and the current independent variable is population
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     15    1588  1500.083688
1     16    1489  1478.097520
2     20    1399  1378.275132
3     29    1314  1215.939620




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
2939.988661563725




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.7175599912036194




Current country being analyzed is Austria and the current independent variable is  gdp_for_year ($) 
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     15    1588  1535.030735
1     16    1489  1534.364861
2     20    1399  1387.557576
3     29    1314  1231.748483




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
2651.2874003561037




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.7452950596482837




Current country being analyzed is Austria and the current independent variable is gdp_per_capita ($)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     15    1588  1533.877062
1     16    1489  1534.642326
2     20    1399  1384.495454
3     29    1314  1240.636157




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
5629.051857749779




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.45922599056130087




Current country being analyzed is Austria and the current independent variable is Anxiety Prevalence (%)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     15    1588  1467.543761
1     16    1489  1467.543761
2     20    1399  1401.990834
3     29    1314  1227.183028




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
12741.638088586747




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
-0.22406879348528919




Current country being analyzed is Austria and the current independent variable is Unemployment Rate(%)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     15    1588  1419.459192
1     16    1489  1415.995454
2     20    1399  1273.982170
3     29    1314  1273.982170




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
diff_country('Japan')
<ipython-input-132-e3896bfb6725>:124: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  correlation_matrix = filtered_data.corr()
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
5707855.394925548




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.018775807371588793




Current country being analyzed is Japan and the current independent variable is year
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual     Predicted
0    181   29989  32954.983755
1    182   29132  32462.965704
2    186   30369  30494.893502
3    195   24357  26066.731047




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
5946874.315872792




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
-0.022313381387096154




Current country being analyzed is Japan and the current independent variable is population
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual     Predicted
0    181   29989  28282.626009
1    182   29132  28492.827007
2    186   30369  28889.633548
3    195   24357  28632.361722




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
6038046.361940648




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
-0.037986556529680415




Current country being analyzed is Japan and the current independent variable is  gdp_for_year ($) 
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual     Predicted
0    181   29989  28938.871649
1    182   29132  29295.339644
2    186   30369  29019.513130
3    195   24357  28961.521826




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
6031312.843286734




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
-0.03682901294323826




Current country being analyzed is Japan and the current independent variable is gdp_per_capita ($)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual     Predicted
0    181   29989  28918.854599
1    182   29132  29295.145945
2    186   30369  29024.927874
3    195   24357  28955.575183




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
2062466.1252913745




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.645446228295895




Current country being analyzed is Japan and the current independent variable is Anxiety Prevalence (%)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual     Predicted
0    181   29989  31062.181585
1    182   29132  31062.181585
2    186   30369  31237.599989
3    195   24357  25975.047867




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
2396783.8616983127




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.5879744410325223




Current country being analyzed is Japan and the current independent variable is Unemployment Rate(%)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual     Predicted
0    181   29989  29430.351490
1    182   29132  30204.497356
2    186   30369  28717.320088
3    195   24357  26680.090652




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
diff_country('United Kingdom')
<ipython-input-132-e3896bfb6725>:124: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  correlation_matrix = filtered_data.corr()
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
90182.91019529989




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
-0.08938020180968498




Current country being analyzed is United Kingdom and the current independent variable is year
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0    312    4290  3781.956679
1    313    4128  3843.741877
2    317    4047  4090.882671
3    326    4788  4646.949458




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
41970.64624859675




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.49300825420954164




Current country being analyzed is United Kingdom and the current independent variable is population
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0    312    4290  3974.475267
1    313    4128  3881.058459
2    317    4047  4086.711060
3    326    4788  4712.041887




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
63865.17679931215




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.22852945153823756




Current country being analyzed is United Kingdom and the current independent variable is  gdp_for_year ($) 
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0    312    4290  4045.148192
1    313    4128  4037.766636
2    317    4047  4288.792185
3    326    4788  4428.969669




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
67762.14308567054




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.18145536721023037




Current country being analyzed is United Kingdom and the current independent variable is gdp_per_capita ($)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0    312    4290  4160.493019
1    313    4128  4158.807412
2    317    4047  4301.876813
3    326    4788  4353.989369




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
35857.30641989976




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.5668554095286011




Current country being analyzed is United Kingdom and the current independent variable is Anxiety Prevalence (%)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0    312    4290  4118.337109
1    313    4128  4100.780103
2    317    4047  4030.552081
3    326    4788  4451.920217




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
69588.78431017889




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.15939013576583083




Current country being analyzed is United Kingdom and the current independent variable is Unemployment Rate(%)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0    312    4290  4226.085825
1    313    4128  4195.909163
2    317    4047  4173.779610
3    326    4788  4284.427372




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
diff_country('Finland')
<ipython-input-132-e3896bfb6725>:124: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning.
  correlation_matrix = filtered_data.corr()
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
2261.841705059185




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.9159338534850054




Current country being analyzed is Finland and the current independent variable is year
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     90    1165  1179.453971
1     91    1204  1155.069495
2     95     994  1057.531588
3    104     789   838.071300




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
2579.752702998046




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.9041180166509433




Current country being analyzed is Finland and the current independent variable is population
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     90    1165  1159.687475
1     91    1204  1139.003747
2     95     994  1063.320081
3    104     789   824.510582




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
8329.064088695868




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.6904326591702117




Current country being analyzed is Finland and the current independent variable is  gdp_for_year ($) 
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     90    1165  1106.867465
1     91    1204  1102.354892
2     95     994  1010.909636
3    104     789   927.993528




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
10212.643621013736




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.6204254289638276




Current country being analyzed is Finland and the current independent variable is gdp_per_capita ($)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     90    1165  1092.867164
1     91    1204  1089.067913
2     95     994  1004.658467
3    104     789   938.413656




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
7816.6685189323625




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
0.7094769278053794




Current country being analyzed is Finland and the current independent variable is Anxiety Prevalence (%)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual    Predicted
0     90    1165  1101.086325
1     91    1204  1101.086325
2     95     994  1078.250412
3    104     789   886.428742




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
Mean Squared Error
-------------------------------------------------------------------------------------------------------------------------------------------------------
35469.26202751885




R-squared
-------------------------------------------------------------------------------------------------------------------------------------------------------
-0.3182903877467005




Current country being analyzed is Finland and the current independent variable is Unemployment Rate(%)
-------------------------------------------------------------------------------------------------------------------------------------------------------
   index  Actual   Predicted
0     90    1165  942.768684
1     91    1204  956.616986
2     95     994  971.867632
3    104     789  964.505251




No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [ ]:
usa = df.loc[df['country'] == 'United States']

y = usa['suicides_no']
x = usa['Unemployment Rate(%)']

psk(x,y)
visual(x,y)
Pearson: 0.5229669784472624
Spearmanr: 0.5537561233777729
Kendall: 0.3025316904537665
No description has been provided for this image
In [ ]:
x = usa[' gdp_for_year ($) ']

psk(x,y)
visual(x,y)
Pearson: 0.9694943115190687
Spearmanr: 0.988235294117647
Kendall: 0.9500000000000001
No description has been provided for this image
In [ ]:
x = usa['population']

psk(x,y)
visual(x,y)
Pearson: 0.9856287427902889
Spearmanr: 0.9970588235294118
Kendall: 0.9833333333333333
No description has been provided for this image
In [ ]:
x = usa['Anxiety Prevalence (%)']

psk(x,y)
visual(x,y)
Pearson: -0.9728282957877852
Spearmanr: -0.9970588235294118
Kendall: -0.9833333333333333
No description has been provided for this image
In [ ]:
UK = df.loc[df['country'] == 'United Kingdom']

y = UK['suicides_no']
x = UK['Unemployment Rate(%)']

psk(x,y)
visual(x,y)
Pearson: 0.3784882272862713
Spearmanr: 0.6294117647058822
Kendall: 0.38333333333333336
No description has been provided for this image
In [ ]:
x = UK[' gdp_for_year ($) ']

psk(x,y)
visual(x,y)
Pearson: 0.3911777286517507
Spearmanr: 0.31176470588235294
Kendall: 0.25
No description has been provided for this image
In [ ]:
x = UK['population']

psk(x,y)
visual(x,y)
Pearson: 0.8574078006265712
Spearmanr: 0.7411764705882353
Kendall: 0.5666666666666667
No description has been provided for this image
In [ ]:
x = UK['Anxiety Prevalence (%)']

psk(x,y)
visual(x,y)
Pearson: 0.6762483923632745
Spearmanr: 0.7016317042084812
Kendall: 0.4852162225324867
No description has been provided for this image
In [ ]:
Austria = df.loc[df['country'] == 'Austria']

y = Austria['suicides_no']
x = Austria['Unemployment Rate(%)']

psk(x,y)
visual(x,y)
Pearson: -0.5903485553040041
Spearmanr: -0.3873347352349848
Kendall: -0.235302425908485
No description has been provided for this image
In [ ]:
x = Austria[' gdp_for_year ($) ']

psk(x,y)
visual(x,y)
Pearson: -0.9296965158168207
Spearmanr: -0.6764705882352942
Kendall: -0.5166666666666667
No description has been provided for this image
In [ ]:
x = Austria['population']

psk(x,y)
visual(x,y)
Pearson: -0.8643322853027127
Spearmanr: -0.7794117647058825
Kendall: -0.6333333333333333
No description has been provided for this image
In [ ]:
x = Austria['Anxiety Prevalence (%)']

psk(x,y)
visual(x,y)
Pearson: -0.8478652313303219
Spearmanr: -0.7770421529709435
Kendall: -0.6276205565667923
No description has been provided for this image
In [ ]:
Japan= df.loc[df['country'] == 'Japan']

y = Japan['suicides_no']
x = Japan['Unemployment Rate(%)']

psk(x,y)
visual(x,y)
Pearson: 0.6664818726262071
Spearmanr: 0.39882277169531377
Kendall: 0.32636268941473195
No description has been provided for this image
In [ ]:
x = Japan[' gdp_for_year ($) ']

psk(x,y)
visual(x,y)
Pearson: -0.1445703007592053
Spearmanr: -0.1647058823529412
Kendall: -0.13333333333333333
No description has been provided for this image
In [ ]:
x = Japan['population']

psk(x,y)
visual(x,y)
Pearson: 0.06208728311481601
Spearmanr: 0.06176470588235294
Kendall: 0.03333333333333333
No description has been provided for this image
In [ ]:
x = Japan['Anxiety Prevalence (%)']

psk(x,y)
visual(x,y)
Pearson: 0.8257599841197141
Spearmanr: 0.6879398467484049
Kendall: 0.5753670112546582
No description has been provided for this image
In [ ]:
Finland= df.loc[df['country'] == 'Finland']

y = Finland['suicides_no']
x = Finland['Unemployment Rate(%)']

psk(x,y)
visual(x,y)
Pearson: 0.09726113184467595
Spearmanr: 0.3294117647058824
Kendall: 0.25
No description has been provided for this image
In [ ]:
x = Finland[' gdp_for_year ($) ']

psk(x,y)
visual(x,y)
Pearson: -0.7187050203184171
Spearmanr: -0.7382352941176471
Kendall: -0.5833333333333334
No description has been provided for this image
In [ ]:
x = Finland['population']

psk(x,y)
visual(x,y)
Pearson: -0.9497484372918075
Spearmanr: -0.9529411764705882
Kendall: -0.8499999999999999
No description has been provided for this image
In [ ]:
x = Finland['Anxiety Prevalence (%)']

psk(x,y)
visual(x,y)
Pearson: -0.8413197138786257
Spearmanr: -0.9565240766177093
Kendall: -0.8692679567468138
No description has been provided for this image